from .models import WordEntry

# https://nl.wiktionary.org/wiki/Categorie:Lemmasjablonen
# https://nl.wiktionary.org/wiki/Categorie:Werkwoordsjablonen
GLOSS_TAG_TEMPLATES = frozenset(
    [
        "absol",
        "accus",
        "auxl",
        "copl",
        "deponens",
        "ditr",
        "erga",
        "inerg",
        "intr",
        "modl",
        "onpr",
        "ov",
        "rcpq",
        "refl",
        "s-verb",
        "plurt",
        "singt",
        "versterkend voorvoegsel",
    ]
)


# https://nl.wiktionary.org/wiki/Categorie:Werkwoordsjablonen
# https://nl.wiktionary.org/wiki/Categorie:WikiWoordenboek:Contextlabels
GLOSS_TAGS = {
    "figuurlijk": "figuratively",
    "afkorting": "abbreviation",
    "causatief": "causative",
    # "chattaal": "",
    "dichterlijk": "poetic",
    "eufemisme": "euphemistic",
    "familienaam": "surname",
    "formeel": "formal",
    "gezegde": "proverb",
    # "heteroniem": "heteronym",
    "historisch": "historical",
    "informeel": "informal",
    "initiaalwoord": "acronym",
    # "klemtoonhomogram": "",
    "krachtterm": "vulgar",
    # "leesteken": "punctuation",
    "letterwoord": "acronym",
    "middeleeuwen": "Middle-Ages",
    "vrouwelijke naam": ["feminine", "name"],
    "mannelijke naam": ["masculine", "name"],
    "mannelijke en vrouwelijke naam": ["masculine", "feminine", "name"],
    "neologisme": "neologism",
    "oudheid": "archaic",
    # "palindroom": "palindrome",
    "pejoratief": "pejorative",
    "persoon": "person",
    # "pregnant": "extra meaning",
    "samenkoppeling": "compound",
    # "sanitair": "",
    "scheldwoord": "pejorative",
    "schertsend": "humorous",
    "spottend": "ironic",
    "spreektaal": "vernacular",
    "spreekwoord": "proverb",
    # "stopwoord": "filled pause",
    "straattaal": "slang",
    "streektaal": "regiolectal",
    "taal": "linguistics",
    "toponiem": "toponymic",
    "verkorting": "clipping",
    "verouderd": "obsolete",
    "Vroegnieuwnederlands": "Early-Modern-Dutch",
    "vulgair": "vulgar",
    "zegswijze": "idiomatic",
    "zeldzaam": "rare",
    "Latijns-Amerika": "Latin-America",
    "absoluut": "absolute",  # Sjabloon:absol
    "accusatief": "accusative",  # Sjabloon:accus
    "hulpwerkwoord": "auxiliary",  # Sjabloon:auxl
    "koppelwerkwoord": "copulative",  # Sjabloon:copl
    "deponens": "deponent",
    "ditransitief": "ditransitive",  # Sjabloon:ditr
    "ergatief": "ergative",  # Sjabloon:erga
    "inergatief": "unergative",  # Sjabloon:inerg
    "onovergankelijk": "intransitive",  # Sjabloon:intr
    "modaal werkwoord": ["modal", "verb"],  # Sjabloon:modl
    "onpersoonlijk": "impersonal",  # Sjabloon:onpr
    "overgankelijk": "transitive",  # Sjabloon:ov
    "wederkerig": "reciprocal",  # Sjabloon:rcpq
    "wederkerend": "reflexive",  # Sjabloon:refl
    "alleen meervoud": "plural-only",  # Sjabloon:plurt
    "geen meervoud": "no-plural",  # Sjabloon:singt
    "versterkend voorvoegsel": ["intensifier", "prefix"],
    "in een bijzin": "with-subordinate-clause",  # Sjabloon:ovt-mv-bijz
    "bij inversie": "inversion",  # Sjabloon:1ps
    "Noord-Nederland": "Northern-Netherland",
    "Vlaanderen": "Flanders",
    "Brabant": "Brabant",
    "Limburg": "Limburg",
}

TABLE_TAGS = {
    # Sjabloon:-nlnoun-
    "enkelvoud": "singular",
    "meervoud": "plural",
    "verkleinwoord": "diminutive",
    "bezitsvorm": "possessive",
    # Sjabloon:adjcomp
    "stellend": "positive",
    "vergrotend": "comparative",
    "overtreffend": "superlative",
    "onverbogen": "uninflected",
    "verbogen": "inflected",
    "partitief": "partitive",
    # Sjabloon:-nlverb-
    "onbepaalde wijs": "infinitive",
    "kort": "short-form",
    "lang": "long-form",
    "onvoltooid": "imperfect",
    "tegenwoordig": "present",
    "toekomend": "future",
    "voltooid": "perfect",
    "onvoltooid deelwoord": ["imperfect", "participle"],
    "voltooid deelwoord": ["past", "participle"],
    "gebiedende wijs": "imperative",
    "aanvoegende wijs": "subjunctive",
    "aantonende wijs": "indicative",
    "eerste": "first-person",
    "tweede": "second-person",
    "derde": "third-person",
    "verleden": "past",
    "voorwaardelijk": "conditional",
    "hoofdzin": "main-clause",
    "bijzin": "subordinate-clause",
    # Sjabloon:-nlname-
    "nominatief": "nominative",
    "genitief": "genitive",
    # Sjabloon:-denoun-
    "datief": "dative",
    "accusatief": "accusative",
    # Sjabloon:-nlverb-reflex-
    "tegenwoordige tijd": "present",
    "verleden tijd": "past",
    "toekomende tijd": "future",
    "1": "first-person",
    "2": "second-person",
    "3": "third-person",
    "voltooide tijd": "past",
    # Sjabloon:-dumverb-
    "onv. deelwoord": ["imperfect", "participle"],
    "volt deelwoord": ["past", "participle"],
    "aantonend": "indicative",
    "aanvoegend": "subjunctive",
}


HEADER_LINE_TAGS = {
    "dim. tant.": ["diminutive", "noun"],  # Sjabloon:dimt
}


TAGS = {**GLOSS_TAGS, **TABLE_TAGS, **HEADER_LINE_TAGS}

# https://nl.wiktionary.org/wiki/Categorie:WikiWoordenboek:Contextlabels
TOPICS = {
    "aardrijkskunde": "geography",
    "adel": "nobility",
    "anatomie": "anatomy",
    "antropologie": "anthropology",
    "archeologie": "archaeology",
    "astrologie": "astrology",
    "astronomie": "astronomy",
    # "bacteriën": "bacterium",
    # "badminton": "badminton",
    "basketbal": "basketball",
    "bedrijf": "business",
    "bedrijfskunde": "business",  # "business administration",
    # "bedrijfstak": "industrial branch",
    "beeldhouwkunst": "arts",  # "sculpting"
    # "beroep": "profession",
    "beschrijvende plantkunde": "botany",  # "descriptive botany"
    # "bidsprinkhanen": "mantises",
    "biochemie": "biochemistry",
    "biologie": "biology",
    "bloemplanten": "botany",
    "boekbinderij": "bookbinding",
    "boekhouding": "accounting",
    "bosbouw": "forestry",
    "bouwkunde": "architecture",
    # "breukgetal": "",
    "bridge": "bridge",
    # "buideldieren": "marsupial",
    # "buikpotigen": "",
    # "buissnaveligen": "",
    # "buistandigen": "",
    # "cloacadieren": "monotreme",
    "communicatie": "communications",
    # "coniferen": "conifers",
    "cosmetica": "cosmetics",
    "cryptografie": "cryptography",
    # "cultuur": "culture",
    "dag": "weekday",
    "dans": "dance",
    "demografie": "demography",
    "demoniem": "demonym",
    "dichtkunst": "poetry",
    # "dierengeluid": "animal sound",
    "diergeneeskunde": ["veterinary", "medicine"],
    "dierkunde": "zoology",
    # "dierluizen": "",
    "diplomatie": "diplomacy",
    "drinken": "beverages",
    # "duifachtigen": "",
    # "duikers": "",
    # "dysfemisme": "dysphemism",
    "ecologie": "ecology",
    "economie": "economics",
    # "eendvogels": "anseriform",
    "eenheid": "units-of-measure",
    "effectenhandel": "trading",
    "egyptologie": "Egyptology",
    # "toponiem: eiland": "",
    "elektronica": "electronics",
    "elektrotechniek": "electrical-engineering",
    # "element": "element",
    "emotie": "emotion",
    # "evenhoevigen": "",
    "familie": "familiar",
    "farmacologie": "pharmacology",
    # "feest": "party",
    "fietsen": "cycling",
    "filatelie": "philately",
    "filmkunst": "cinematography",
    "filosofie": "philosophy",
    "financieel": "financial",
    # "flamingoachtigen": "",
    "folklore": "folklore",
    "fotografie": "photography",
    # "fruit": "fruit",
    # "futen": "grebe",
    "fysiologie": "physiology",
    "genetica": "genetics",
    # "gentachtigen": "",
    "geologie": "geology",
    "geopolitiek": "geopolitics",
    "gereedschap": "tools",
    "geschiedenis": "history",
    "glaciologie": "glaciology",
    # "godheid": "deity",
    # "graan": "grain",
    "grammatica": "grammar",
    "groente": "vegetable",
    # "grondmechanica": "",
    "haar": "hairstyle",
    "handel": "business",
    "heraldiek": "heraldry",
    "hobby": "hobbies",
    "hoofddeksel": "headgear",
    # "horeca": "",
    "houtbewerking": "woodworking",
    # "huishouden": "housekeeping",
    "imkerij": "beekeeping",
    # "industrie": "industry",
    "informatica": "computer sciences",
    "internet": "Internet",
    # "jaarwisseling": "",
    "jachttaal": "hunting",
    # "jongerentaal": "",
    "juridisch": "legal",
    "kaartspel": "card-games",
    # "kamperen": "camping",
    # "kerst": "Christmas",
    # "kindertaal": "child language",
    "kleding": "clothing",
    "kleur": "colour",
    # "knutselen": "",
    "kookkunst": "culinary",
    # "krachtsport": "",
    "kristallografie": "crystallography",
    # "kruid": "",
    # "kuiperij": "",
    "kunst": "arts",
    "landbouw": "agriculture",
    "landmeetkunde": "surveying",
    "leenstelsel": "feudalism",
    # "leerbewerking": "",
    # "leidekkerij": "",
    "letterkunde": "literature",
    "lhbt": "LGBT",
    "logica": "logic",
    "luchtvaart": "aviation",
    # "maatschappij": "company",
    # "magie": "magic",
    "makelaardij": "real-estate",
    # "materiaalkunde": "materials science",
    # "media": "",
    "medisch": "medicine",
    # "meer": "lake",
    "meetkunde": "geometry",
    "metaalbewerking": "metalworking",
    "metallurgie": "metallurgy",
    "klimatologie": "climatology",
    "meteorologie": "meteorology",
    # "metonymisch": "",
    "meubel": "furniture",
    "mijnbouw": "mining",
    "milieukunde": "ecology",
    "militair": "military",
    "mineraal": "mining",
    "mineralogie": "mineralogy",
    # "misdaad": "crime",
    "mode": "fashion",
    # "molenaarsambacht": "",
    "muziek": "music",
    "muziekinstrument": "music",
    "mycologie": "mycology",
    "mythologie": "mythology",
    "natuurkunde": "physics",
    "neurologie": "neurology",
    "numismatiek": "numismatics",
    "oenologie": "oenology",
    "onderwijs": "education",
    "oorlog": "war",
    "optica": "optics",
    "ordehandhaving": "law enforcement",
    # "paardrijden": "horseriding",
    # "planologie": "planology",
    "plantkunde": "botany",
    "politiek": "politics",
    "post": "mail",
    "psychologie": "psychology",
    "regering": "government",
    "religie": "religion",
    # "ruimtevaart": "space travel",
    "schaak": "chess",
    "scheepvaart": "shipping",
    "scheikunde": "chemistry",
    # "schilderkunst": "painting",
    # "schoeisel": "shoewear",
    "scouting": "scouting",
    "seismologie": "seismology",
    "seksualiteit": "sexuality",
    "sieraad": "jewellery",
    # "slapen": "sleep",
    # "snoepgoed": "candy",
    "sociologie": "sociology",
    # "specerij": "spice",
    "speelgoed": "toys",
    "spel": "games",
    # "spellingsalfabet": "spelling alphabet",
    "spoorwegen": "railways",
    "sport": "sports",
    "statistiek": "statistics",
    # "sterrenbeeld": "constellation",
    "valutanaam": "money",
    "taalkunde": "linguistics",
    "tandheelkunde": "dentistry",
    "techniek": "technology",
    # "teken- en schrijfmateriaal": "",
    "tekstkritiek": "textual criticism",
    "telecommunicatie": "telecommunications",
    "tennis": "tennis",
    "textiel": "textiles",
    "textielindustrie": "textiles",
    "thermodynamica": "thermodynamics",
    # "tijdrekening": "timekeeping",
    "toerisme": "tourism",
    "toneel": "theater",
    "transport": "transport",
    "tuinbouw": "horticulture",
    # "tuinieren": "gardening",
    "typografie": "typography",
    "valkerij": "falconry",
    # "veeteelt": "husbandry",
    "verkeer": "traffic",
    "visserij": "fishing",
    "voeding": "food",
    "voetbal": "football",
    "volleybal": "volleyball",
    # "waterbeheer": "water management",
    "wegenbouw": ["road", "construction"],
    "werelddeel": "continents",
    "werktuigbouwkunde": "mechanical-engineering",
    "wetenschap": "sciences",
    "wielrennen": "cycling",
    # "Wikimedia": "Wikimedia",
    # "wikitaal": "",
    # "windstreek": "",
    # "wintersport": "",
    "wiskunde": "mathematics",
    # "wonen": "",
    "zoötomie": "zootomy",
    "zwemmen": "swimming",
    "toponiem: land": "country",  # Template:land
}


def translate_raw_tags(data: WordEntry) -> None:
    raw_tags = []
    for raw_tag in data.raw_tags:
        if raw_tag in TAGS:
            tr_tag = TAGS[raw_tag]
            if isinstance(tr_tag, str):
                data.tags.append(tr_tag)
            elif isinstance(tr_tag, list):
                data.tags.extend(tr_tag)
        elif raw_tag in TOPICS and hasattr(data, "topics"):
            tr_topic = TOPICS[raw_tag]
            if isinstance(tr_topic, str):
                data.topics.append(tr_topic)
            elif isinstance(tr_topic, list):
                data.topics.extend(tr_topic)
        else:
            raw_tags.append(raw_tag)
    data.raw_tags = raw_tags


# used in translation, linkage and gloss lists
LIST_ITEM_TAG_TEMPLATES = {
    "m": "masculine",
    "f": "feminine",
    "n": "neuter",
    "c": "common",
    "s": "singular",
    "p": "plural",
    "a": "animate",
    "i": "inanimate",
    "impf": "imperfective",
    "pf": "perfective",
}
